#!/bin/bash
# install-arrow-openeuler.sh
# 功能：在 openEuler 25 上编译安装 Apache Arrow C++ + pyarrow
# 兼容缺少 python3-numpy-devel 的情况

set -e

echo "🚀 开始安装 Apache Arrow C++ 和 pyarrow..."

# ========================
# 1. 安装系统依赖（去掉 python3-numpy-devel）
# ========================
echo "📦 安装系统依赖..."
sudo dnf install -y \
    git cmake gcc gcc-c++ make pkgconfig \
    python3-devel \          # Python C API
    zlib-devel bzip2-devel \
    lz4-devel zstd-devel \
    snappy-devel \
    boost-devel

# ========================
# 2. 使用 pip 安装 numpy（包含开发头文件）
# ========================
echo "💡 通过 pip 安装 numpy（替代 python3-numpy-devel）..."
uv pip install numpy

# 验证 numpy 是否安装成功
python -c "import numpy; print(f'NumPy 头文件路径: {numpy.get_include()}')"

# ========================
# 3. 设置版本和目录
# ========================
ARROW_VERSION="17.0.0"
BUILD_DIR="/tmp/arrow-build"
mkdir -p "$BUILD_DIR"
cd "$BUILD_DIR"

# ========================
# 4. 下载 Arrow 源码
# ========================
echo "⬇️ 下载 Apache Arrow ${ARROW_VERSION}..."
git clone --branch apache-arrow-${ARROW_VERSION} --depth=1 \
    https://github.com/apache/arrow.git

cd arrow/cpp

# ========================
# 5. 编译并安装 Arrow C++
# ========================
mkdir -p build
cd build

cmake .. \
    -DCMAKE_BUILD_TYPE=Release \
    -DCMAKE_INSTALL_PREFIX=/usr/local \
    -DARROW_PYTHON=ON \
    -DARROW_COMPUTE=ON \
    -DARROW_DATASET=ON \
    -DARROW_CSV=ON \
    -DARROW_JSON=ON \
    -DARROW_WITH_ZSTD=ON \
    -DARROW_WITH_LZ4=ON \
    -DARROW_WITH_SNAPPY=ON

echo "🔨 编译 Arrow C++..."
make -j$(nproc)

echo "✅ 安装到 /usr/local"
sudo make install

# ========================
# 6. 安装 pyarrow（链接本地 Arrow）
# ========================
echo "📦 安装 pyarrow..."
cd "$BUILD_DIR/arrow/python"

# 告诉 pyarrow 使用本地编译的 Arrow
export PYARROW_BUNDLE_ARROW_CPP=0
export PYARROW_BUILD_TYPE=release
export PYARROW_WITH_COMPUTE=1
export PYARROW_WITH_DATASET=1

# 关键：指定 Arrow 安装路径
export ARROW_HOME=/usr/local

# 安装 pyarrow
uv pip install --no-binary=pyarrow .

# ========================
# 7. 验证
# ========================
python -c "
import pyarrow as pa
import numpy as np
print('🎉 PyArrow 安装成功！')
print('版本:', pa.__version__)
print('库路径:', pa.__file__)
print('创建一个 Array:', pa.array([1, 2, 3]))
" || {
    echo "❌ 安装失败"
    exit 1
}

echo "✅ Apache Arrow 和 pyarrow 安装完成！"
